/**
 * 2017年5月17日
 */
package cn.edu.bjtu.tokenization;

import java.io.File;
import java.io.UnsupportedEncodingException;

import org.ansj.library.AmbiguityLibrary;
import org.ansj.library.DicLibrary;
import org.deeplearning4j.text.tokenization.tokenizer.TokenPreProcess;

import cn.edu.bjtu.core.LoggerSupport;

/**
 * 由于MAVEN工程路径与打包之后路径问题具体什么问题这里不详细说,
 * 加载词典时候一直报错,导致自定义词典及模糊词性词典无法成功
 * 所以这个类static块里面重新强制加载一下.
 * 
 * @author Alex
 *
 */
public abstract class BaseFilterTokenizer extends LoggerSupport{

	static{
		try {
			
			DicLibrary.put(DicLibrary.DEFAULT, new File(BaseFilterTokenizer.class.getResource("/library/default.dic").toURI()).getAbsolutePath());
			AmbiguityLibrary.put(AmbiguityLibrary.DEFAULT, new File(BaseFilterTokenizer.class.getResource("/library/ambiguity.dic").toURI()).getAbsolutePath());
		} catch (Exception e) {
			
			e.printStackTrace();
		}
	}
	protected TokenPreProcess tokenPreProcess;
	
	public void setTokenPreProcessor(TokenPreProcess tokenPreProcessor) {
		this.tokenPreProcess = tokenPreProcessor;

	}
	
	protected String filter(String x) {
		if(x==null){
			return "";
		}
		x = x.trim();
		if(tokenPreProcess!=null)
			x = tokenPreProcess.preProcess(x);
		if(x == null || x.equals("")) return "";
		
		if(x.length() == 1){
			return "";
		}
		
		return x;
	}

}
